{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Set up data directories prior to using fastai libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Python Version: 3.5.2 |Anaconda custom (x86_64)| (default, Jul  2 2016, 17:52:12) \n",
      "[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]\n"
     ]
    }
   ],
   "source": [
    "# check what version of Python I'm using\n",
    "import sys\n",
    "print(\"Python Version:\", sys.version)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# this is my path where I want my data directory to be located\n",
    "PATH = \"/Users/reshamashaikh/ds\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# create data directory if it doesn't exist\n",
    "# remember to comment out once you've created it, so you don't run this cell again\n",
    "# os.makedirs(PATH+'/data', exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[34massessments\u001b[m\u001b[m        \u001b[34mdata\u001b[m\u001b[m               \u001b[34mmy_repos\u001b[m\u001b[m           \u001b[34mtensor_flow\u001b[m\u001b[m\r\n",
      "\u001b[34maws_cloud_guru\u001b[m\u001b[m     \u001b[34mgit_work\u001b[m\u001b[m           \u001b[34msoftware_carpentry\u001b[m\u001b[m\r\n"
     ]
    }
   ],
   "source": [
    "!ls {PATH}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Users/reshamashaikh/ds/my_repos/fastai_deeplearn_part1/code\r\n"
     ]
    }
   ],
   "source": [
    "!pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# create directory for dogscats project; only have to do this once\n",
    "#os.makedirs(PATH+'/data/dogscats', exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[34mdogscats\u001b[m\u001b[m\r\n"
     ]
    }
   ],
   "source": [
    "!ls {PATH}/data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# go to this directory in terminal\n",
    "# !ls {PATH}/data/dogscats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#!kg config -g -u 'reshamashaikh' -p 'xxx' -c dogs-vs-cats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#!kg download"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note:  You can also do this in your terminal.  \n",
    "```bash-q\n",
    "~/ds/data/dogscats                                                                                \n",
    "▶ kg config -g -u 'reshamashaikh' -p 'xxx' -c dogs-vs-cats  \n",
    "\n",
    "~/ds/data/dogscats                                                                                \n",
    "▶ kg download\n",
    "downloading https://www.kaggle.com/c/dogs-vs-cats/download/sampleSubmission.csv\n",
    "\n",
    "sampleSubmission.csv 100% |###################################| Time: 0:00:00 135.5 KiB/s\n",
    "\n",
    "downloading https://www.kaggle.com/c/dogs-vs-cats/download/test1.zip\n",
    "\n",
    "test1.zip 100% |#######################################################################| Time: 0:04:39 993.8 KiB/s\n",
    "\n",
    "downloading https://www.kaggle.com/c/dogs-vs-cats/download/train.zip\n",
    "\n",
    "train.zip 100% |#######################################################################| Time: 0:12:57 715.5 KiB/s\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "total 1667896\r\n",
      "drwxr-xr-x  5 reshamashaikh  staff        170 Nov 18 14:01 \u001b[34m.\u001b[m\u001b[m\r\n",
      "drwxr-xr-x  3 reshamashaikh  staff        102 Nov 18 14:01 \u001b[34m..\u001b[m\u001b[m\r\n",
      "-rw-r--r--  1 reshamashaikh  staff  569546721 Nov 18 13:55 train.zip\r\n",
      "-rw-r--r--  1 reshamashaikh  staff  284321224 Nov 18 13:42 test1.zip\r\n",
      "-rw-r--r--  1 reshamashaikh  staff      88903 Nov 18 13:37 sampleSubmission.csv\r\n"
     ]
    }
   ],
   "source": [
    " !ls -alt {PATH}/data/dogscats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# to install on Linux\n",
    "# sudo apt install unzip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#!unzip {PATH}/data/dogscats/train.zip"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Example of printout when not using <kbd> -q </kbd> flag\n",
    "```bash\n",
    "Archive:  /Users/reshamashaikh/ds/data/dogscats/train.zip\n",
    "   creating: train/\n",
    "  inflating: train/cat.0.jpg         \n",
    "  inflating: train/cat.1.jpg         \n",
    "  inflating: train/cat.10.jpg        \n",
    "  inflating: train/cat.100.jpg \n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "unzip:  cannot find or open /Users/reshamashaikh/ds/data/dogscats/test.zip, /Users/reshamashaikh/ds/data/dogscats/test.zip.zip or /Users/reshamashaikh/ds/data/dogscats/test.zip.ZIP.\r\n"
     ]
    }
   ],
   "source": [
    "# run at terminal, might be faster\n",
    "#!unzip -q {PATH}/data/dogscats/test1.zip"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### move **dogs** photos to separate directory\n",
    "```bash\n",
    "mv dog.* dogs\n",
    "mv dog.9* dogs\n",
    "mv dog.8* dogs\n",
    "mv dog.* dogs\n",
    "ls dogs/ | wc -l\n",
    "```\n",
    "#### move **cats** photos to separate directory\n",
    "```bash\n",
    "mv cat.* cats\n",
    "mv cat.9* cats\n",
    "mv cat.8* cats\n",
    "mv cat.* cats\n",
    "ls cats/ | wc -l\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   12500\r\n"
     ]
    }
   ],
   "source": [
    "!ls train/dogs/ | wc -l"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   12500\r\n"
     ]
    }
   ],
   "source": [
    "!ls train/cats/ | wc -l"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# see how many files in each directory\n",
    "#!ls valid/cats/ | wc -l\n",
    "!ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# want to create a \"valid\" directory for validation.  \n",
    "# Advice from Jeremy:  \n",
    "#    separate TEST data into VALIDATION TASK: move 1000 each dogs / cats into valid\n",
    "# os.makedirs(PATH+'/data/dogscats/valid', exist_ok=True)\n",
    "\n",
    "# use this directory to store models\n",
    "# os.makedirs(PATH+'/data/dogscats/models', exist_ok=True)\n",
    "\n",
    "# use this directory for small sub-sample; makes it easier to run test models\n",
    "# COPY 100 pictures from train directory to \"sample\"\n",
    "#os.makedirs(PATH+'/data/dogscats/sample', exist_ok=True)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Symbolic Links\n",
    "Here’s an example. Let’s say you wanted to create a symbolic link in your Desktop folder that points to your Downloads folder. You’d run the following command:\n",
    "\n",
    "<kbd> ln -s /Users/name/Downloads /Users/name/Desktop </kbd>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# set sample data\n",
    "mkdir -p {PATH}/data/dogscats_sample/{valid,train}/{cats,dogs}\n",
    "shuf -n 200 -e data/dogscats/train/cats | xargs -i cp {} data/dogscats_sample/train/cats\n",
    "shuf -n 200 -e data/dogscats/train/cats/* | xargs -i cp {} data/dogscats_sample/train/cats\n",
    "shuf -n 200 -e data/dogscats/train/dogs/* | xargs -i cp {} data/dogscats_sample/train/dogs\n",
    "shuf -n 100 -e data/dogscats/valid/cats/* | xargs -i cp {} data/dogscats_sample/valid/cats\n",
    "shuf -n 100 -e data/dogscats/valid/dogs/* | xargs -i cp {} data/dogscats_sample/valid/dogs\n",
    "\n",
    "\n",
    "In your notebook, change the PATH to PATH = \"data/dogscats_sample/\""
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
